from com.zjs.crawer.urlcontent.baseurlcontent import BaseUrlContent
from com.zjs.zjsqueue import zurlpathqueue

from bs4 import BeautifulSoup as BS
from com.zjs.util.download import request
from com.zjs.zjsqueue import zurlcontentqueue
from queue import Empty
import logging
from com.zjs.crawer.urlcontent.baseurlcontent import BaseUrlContent
import importlib,sys
importlib.reload(sys)


class jobPaths(BaseUrlContent):
    
    name = "job"
    
    def __init__(self):
        logging.debug("jobPaths方法")
    
    def run(self):
        while True:
            try:
                url = zurlpathqueue.get(self.name)
                self.dojob(url)
            except Exception as ex:
                logging.error("[path]["+self.name+"]:"+str(url)+"解析失败")
                logging.error(ex)
        
    def dojob(self,url_b):
        html=request.get(url_b,1)  
        soup=BS(html.text, "lxml")
        # 保存A类链接的集合,避免重复,用set去重
        # 通过分析页面,抓取所有的class="pic-panel"的div元素下的a标签的href
        # 先通过find_all抓取所有符合条件的div
        try:
            divs_=soup.find_all("p",attrs={"class":"t1 "})
            for div_ in divs_:                
                a_=div_.find("a")
                zurlcontentqueue.put(self.name,a_['href'])
        except:
            pass
